Quality

Preamble

Dependencies

Code
library(dplyr)
library(tidyr)
library(scran)
library(scater)
library(Matrix)
library(ggplot2)
library(patchwork)
library(RColorBrewer)
library(SpatialExperiment)

Loading

Code
dir <- file.path("..", "data", "Visium")
ids <- list.dirs(dir, recursive = FALSE)
ids <- ids[grep("^B[0-9]+", basename(ids))]
names(ids) <- gsub("_[0-9]*$", "", basename(ids))
spe <- read10xVisium(ids, type = "HDF5", images = "lowres")
# make barcodes/spot identifiers unique
spe$barcode <- colnames(spe); colnames(spe) <- 
    paste(spe$sample_id, spe$barcode, sep = ".")
# sparsify counts
y <- as.matrix(assay(spe))
y <- as(y, "dgCMatrix")
assay(spe) <- y
Code
md <- read.csv(file.path(dir, "metadata", "md.csv"))
md <- md[!is.na(md$Used.for.10x), ]
md$sample_id <- gsub("-", "_", md$B.number)
idx <- match(spe$sample_id, md$sample_id)
table(spe$TumorType <- factor(md$Tumor.type[idx]))

ccRCC  LSCC  LUAD 
 9564 16010  2158 
Code
csv1 <- file.path(dir, "metadata", "labels_all.csv")
csv2 <- file.path(dir, "metadata", "labels_tls.csv")
lab1 <- read.csv(csv1, row.names = 1)
lab2 <- read.csv(csv2, row.names = 1)
lab1$TLS[lab1$TLS == ""] <- NA
lab2$TLS[lab2$TLS == ""] <- NA
spe$anno1 <- factor(NA, unique(lab1$TLS))
spe$anno2 <- factor(NA, unique(lab2$TLS))
lab2 <- cbind(lab2, lab1[, c("Barcode", "Patient_ID")])
idx0 <- split(seq(ncol(spe)), spe$sample_id)
lab1 <- split(lab1, lab1$Patient_ID)
lab2 <- split(lab2, lab2$Patient_ID)
for (. in names(ids)) {
    bcs1 <- gsub("-[0-9]$", "-1", lab1[[.]]$Barcode)
    bcs2 <- gsub("-[0-9]$", "-1", lab2[[.]]$Barcode)
    bcs0 <- spe$barcode[idx0[[.]]]
    idx1 <- match(bcs0, bcs1)
    idx2 <- match(bcs0, bcs2)
    spe$anno1[idx0[[.]]] <- lab1[[.]]$TLS[idx1]
    spe$anno2[idx0[[.]]] <- lab2[[.]]$TLS[idx2]
}
table(spe$sample_id, spe$anno1)
           
             NOR  TUM INFL  TLS EXCL   LN
  B04_17776 1664  585  141  161    0    0
  B05_32288  574  355  136   80    0    0
  B05_8527   330 1089   40   43   87    0
  B06_24137 1137 1042   62  407    0   33
  B06_24784 1126  718  178  138    0    0
  B07_30616  266  491    6   32    0    0
  B07_38596  493  593   27   96  977    0
  B15_11190  160  364   39   27    0    0
Code
table(spe$sample_id, spe$anno2)[1:5, 1:6]
           
             NOR  TUM INFL 17776_MTLS3 17776_ETLS11 17776_ETLS12
  B04_17776 1664  585  140          28           22           27
  B05_32288  587  355  126           0            0            0
  B05_8527   330 1089   40           0            0            0
  B06_24137 1137 1042   62           0            0            0
  B06_24784 1125  718  173           0            0            0

Wrangling

Code
rd <- rowData(spe)
rd$ensembl <- rownames(spe)
rownames(spe) <- make.names(rd$symbol)
spatialCoordsNames(spe) <- c("x", "y")
spe$sample_id <- factor(spe$sample_id)
Code
spe <- logNormCounts(spe, transform = "none")
spe <- logNormCounts(spe, transform = "log")

Quality

Code
# add gene-(sample-) & spot-level QC summaries 
sub <- split(seq(ncol(spe)), spe$sample_id)
spe <- addPerFeatureQC(spe, subsets = sub)
spe <- addPerCellQC(spe)
# get tables of gene & spot metadata
rd <- .df(spe, margin = 1)
cd <- .df(spe, margin = 2)

Genes

Code
dr <- rd %>% 
    select(matches("subsets.*detected")) %>% 
    pivot_longer(everything()) %>% 
    mutate(sample_id = gsub("subsets_(.*)_detected", "\\1", name))
ggplot(dr, aes(value, col = sample_id)) &
    labs(x = "% of cells with non-zero count") &
    geom_density(key_glyph = "point") & scale_x_sqrt() & 
    guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
    theme_bw() & theme(aspect.ratio = 2/3, 
        panel.grid.minor = element_blank(), 
        legend.key.size = unit(0.5, "lines")) 

Spots

Code
ggplot(cd, aes(total, col = sample_id)) +
ggplot(cd, aes(detected, col = sample_id)) +
ggplot(cd, aes(total, col = anno1)) +
ggplot(cd, aes(detected, col = anno1)) +
    plot_layout(nrow = 2, guides = "collect") &
    geom_density(key_glyph = "point") & scale_x_log10() & 
    guides(color = guide_legend(override.aes = list(size = 3, alpha = 1))) &
    theme_bw() & theme(legend.key.size = unit(0.5, "lines")) 

Code
cd %>% 
  group_by(sample_id) %>% 
  mutate(total = .scale01(log10(total))) %>% 
  ggplot(aes(x, y, col = total)) +
  geom_point(shape = 16, size = 1) +
  facet_wrap(~ sample_id, nrow = 2) +
  scale_color_viridis_c() +
  coord_equal() + theme_void() +
  theme(legend.position = "none")

Code
cd %>% 
  ggplot(aes(x, y, col = anno1)) +
  geom_point(shape = 16, size = 1) +
  facet_wrap(~ sample_id, nrow = 2) +
  coord_equal() + theme_void() +
  theme(legend.position = "bottom") +
  guides(col = guide_legend(nrow = 1, 
      override.aes = list(size = 3)))

Code
ns <- as.data.frame(table(sample_id = spe$sample_id, anno1 = spe$anno1))
ggplot(ns, aes(anno1, Freq, fill = anno1)) +
    facet_wrap(~ sample_id, scales = "free_y", nrow = 2) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(y = "# spots") + theme_bw() + theme(
        panel.grid.minor = element_blank(),
        legend.key.size = unit(0.5, "lines"),
        axis.text.x = element_text(angle = 45, hjust = 1))

Code
ggplot(ns, aes(sample_id, Freq, fill = sample_id)) +
    facet_wrap(~ anno1, scales = "free_y", nrow = 2) +
    geom_bar(stat = "identity", position = "dodge") +
    labs(y = "# spots") + theme_bw() + theme(
        panel.grid = element_blank(),
        legend.key.size = unit(0.5, "lines"),
        axis.text.x = element_text(angle = 45, hjust = 1))

Filtering

# drop genes/spots without 
# any detected spots/genes
y <- counts(spe) > 1
sub <- spe[
    rowSums(y) > 1,
    colSums(y) > 1]
cbind(
    raw = dim(spe), 
    fil = dim(sub))
       raw   fil
[1,] 17878 16643
[2,] 27732 27595

Appendix

Saving

Code
saveRDS(spe, "../outs/01-spe.rds")
sessionInfo()
R version 4.3.0 (2023-04-21)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.2.1

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Zurich
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] SpatialExperiment_1.10.0    RColorBrewer_1.1-3         
 [3] patchwork_1.1.2             Matrix_1.6-0               
 [5] scater_1.28.0               ggplot2_3.4.2              
 [7] scran_1.28.2                scuttle_1.10.1             
 [9] SingleCellExperiment_1.22.0 SummarizedExperiment_1.30.2
[11] Biobase_2.60.0              GenomicRanges_1.52.0       
[13] GenomeInfoDb_1.36.1         IRanges_2.34.1             
[15] S4Vectors_0.38.1            BiocGenerics_0.46.0        
[17] MatrixGenerics_1.12.2       matrixStats_1.0.0          
[19] tidyr_1.3.0                 dplyr_1.1.2                

loaded via a namespace (and not attached):
 [1] bitops_1.0-7              gridExtra_2.3            
 [3] rlang_1.1.1               magrittr_2.0.3           
 [5] compiler_4.3.0            DelayedMatrixStats_1.22.1
 [7] vctrs_0.6.3               pkgconfig_2.0.3          
 [9] crayon_1.5.2              fastmap_1.1.1            
[11] magick_2.7.4              XVector_0.40.0           
[13] labeling_0.4.2            utf8_1.2.3               
[15] rmarkdown_2.23            ggbeeswarm_0.7.2         
[17] purrr_1.0.1               xfun_0.39                
[19] bluster_1.10.0            zlibbioc_1.46.0          
[21] beachmat_2.16.0           jsonlite_1.8.7           
[23] rhdf5filters_1.12.1       DelayedArray_0.26.6      
[25] Rhdf5lib_1.22.0           BiocParallel_1.34.2      
[27] irlba_2.3.5.1             parallel_4.3.0           
[29] cluster_2.1.4             R6_2.5.1                 
[31] limma_3.56.2              Rcpp_1.0.11              
[33] knitr_1.43                R.utils_2.12.2           
[35] igraph_1.5.0.1            tidyselect_1.2.0         
[37] rstudioapi_0.15.0         abind_1.4-5              
[39] yaml_2.3.7                viridis_0.6.4            
[41] codetools_0.2-19          lattice_0.21-8           
[43] tibble_3.2.1              withr_2.5.0              
[45] evaluate_0.21             pillar_1.9.0             
[47] generics_0.1.3            RCurl_1.98-1.12          
[49] sparseMatrixStats_1.12.2  munsell_0.5.0            
[51] scales_1.2.1              glue_1.6.2               
[53] metapod_1.8.0             tools_4.3.0              
[55] BiocNeighbors_1.18.0      ScaledMatrix_1.8.1       
[57] locfit_1.5-9.8            rhdf5_2.44.0             
[59] grid_4.3.0                DropletUtils_1.20.0      
[61] edgeR_3.42.4              colorspace_2.1-0         
[63] GenomeInfoDbData_1.2.10   beeswarm_0.4.0           
[65] BiocSingular_1.16.0       HDF5Array_1.28.1         
[67] vipor_0.4.5               cli_3.6.1                
[69] rsvd_1.0.5                fansi_1.0.4              
[71] S4Arrays_1.0.5            viridisLite_0.4.2        
[73] gtable_0.3.3              R.methodsS3_1.8.2        
[75] digest_0.6.33             ggrepel_0.9.3            
[77] dqrng_0.3.0               farver_2.1.1             
[79] rjson_0.2.21              htmlwidgets_1.6.2        
[81] htmltools_0.5.5           R.oo_1.25.0              
[83] lifecycle_1.0.3           statmod_1.5.0